# http://cran.r-project.org/ AND DOWNLOAD THE R LATEST RELEASE: R-2.x.y.
# http://www.staff.ul.ie/mackenzieg/Assess/R_Integration_Tools/r_integration_tools.html (Crash Course in R)
#
# https://educationalresearchtechniques.com/2017/02/24/subset-regression-in-r/
# STEPWISE REGRESSIONS USING AIC = -2*log-likelihood + k*npar
#
library(foreign)
library(leaps)
library(Ecdat)
library(car)
library(lmtest)
x <- read.spss("U:\\My Documents\\suicides.sav")
x1 <- data.frame(x)
x1
attach(x1,2)
m1 <- lm(suicide ~ ., data=x1)
vif(m1)
sub.fit <- regsubsets(suicide~.,data=x1)
best.summary <- summary(sub.fit)
par(mfrow=c(1,2))
plot(best.summary$cp)
plot(sub.fit,scale = "Cp")


m2 <- step(m1)
#
subdat <- subset(x1,select=c(suicide,prestige,income))
m1 <- lm(suicide ~ ., data=subdat)
m2 <- step(m1)

library(MASS)
dropterm(m1,test="F")

>>>>>>>>>>>>>>>>>>>>> OUTPUT

> vif(m1)
prestige   income educatio 
5.875118 2.660210 3.859671 


> sub.fit <- regsubsets(suicide~.,data=x1)
> best.summary <- summary(sub.fit)


> best.summary

Subset selection object
Call: regsubsets.formula(suicide ~ ., data = x1)
3 Variables  (and intercept)
         Forced in Forced out
prestige     FALSE      FALSE
income       FALSE      FALSE
educatio     FALSE      FALSE
1 subsets of each size up to 3
Selection Algorithm: exhaustive
         prestige income educatio
1  ( 1 ) " "      "*"    " "     
2  ( 1 ) " "      "*"    "*"     
3  ( 1 ) "*"      "*"    "*"     
> best.summary$cp
[1] 10.509832  4.358393  4.000000

>par(mfrow=c(1,2))
>plot(best.summary$cp)
>plot(sub.fit,scale = "Cp")

SEE SUBSETS IN R GRAPH.PDF

> subdat <- subset(x1,select=c(suicide,prestige,income))
> m1 <- lm(suicide ~ ., data=subdat)
> m2 <- step(m1)
Start:  AIC=179.38
suicide ~ prestige + income

           Df Sum of Sq    RSS    AIC
- prestige  1    127.50 4572.9 178.40
<none>                  4445.4 179.38
- income    1    410.97 4856.3 180.56

Step:  AIC=178.4
suicide ~ income

         Df Sum of Sq    RSS    AIC
<none>                4572.9 178.40
- income  1    341.52 4914.4 178.99

> dropterm(m1,test="F")

Single term deletions

Model:
suicide ~ prestige + income
         Df Sum of Sq    RSS    AIC F Value  Pr(F)  
<none>                4445.4 179.38                 
prestige  1    127.50 4572.9 178.40 0.94651 0.3377  
income    1    410.97 4856.3 180.56 3.05085 0.0900 .
---
Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1   1

#
# FOR THE MODEL: SUICIDE = A * PRESTIGE + B * INCOME + ERROR;
#
#
# Log likelihood = -n/2 ln(2 Pi s-squared) - n/2
#                = -n/2 ln(2 Pi RSS/n)     - n/2
#                = -36/2 ln(2 6.28 4445.4/36) - 36/2 = -137.77
#
#            AIC = -2 x -137.77 + 2 x 4 = 283.54
#
library(stats)
logLik(m1)
#
# -2log L is computed from the deviance and uses a different additive constant to logLik and hence AIC.
#
#In Step:     AIC = n ln(RSS/n) + 2 df(model)
#                 = 36 ln(4445.4/36) + 2 x 3
#                 = 179.38
#
# Mallows Cp (SPSS syntax)
#
Cp = [RSS / MSE(full model)] + 2 df(model) - n  (income only predicting suicides)
   = [4572.87 / 134.71] + 2 x 2 - 36
   = 1.946

Cp = [4914.39 / 134.71] + (2 x 1) - 36 for null model with just constant term
   = 2.48 > 1.946 (= Cp using in come only as predictor)

AIC = 36 ln(4914.39/36) + 2 =  178.99 > 178.4 (= AIC using income only as a predictor)